
#########################################################################
##########               GET CHILD-YEAR DATASET               ###########
#########################################################################

library(haven)
library(readr)
library(data.table)
library(scales)
# 

# # G:\IMDB_AllYears\rdc\New_components_July_2019\Children  - Raw file 
# # in sas convered to stata
# 

filepathimdb="H:/Zheng_10223/Derived Data"


# FILE PATH: G:\IMDB_AllYears\rdc\New_components_July_2019\Children\child_pnrf_f3_v1.sas7bdat
child_landing <- read_dta("H:/Zheng_10223/Joint/child_pnrf_1980_2020_f3_v1.dta")
child_landing <- child_landing[,c("IMDB_ID", "LANDING_AGE", "BIRTH_YEAR", "Parent1", "Parent2", "Parent3", "Parent4", "main_parent")]
# 
child_landing$MAIN_PARENT=child_landing$main_parent; child_landing$main_parent=NULL
child_landing$PARENT1=child_landing$Parent1; child_landing$Parent1=NULL
child_landing$PARENT2=child_landing$Parent2; child_landing$Parent2=NULL
child_landing$PARENT3=child_landing$Parent3; child_landing$Parent3=NULL
child_landing$PARENT4=child_landing$Parent4; child_landing$Parent4=NULL

# 
# # For each child, get all income each year
# # Runtime per dataset: 3mins on station 1
panel <- NULL
for(i in 1982:2019){
  print(i)
  tmp <- data.table(IMDB_ID=unique(child_landing$IMDB_ID), Year=i)
  # Merge in time-invariant Parent2 characteristics
  tmp <- merge(x=tmp, y=child_landing, by.x="IMDB_ID", by.y="IMDB_ID", all.x=T)
  
  

  indvar=ifelse(i>2001,"NAIC1I","NAICSPI")
  
  
  tax <- read_dta(paste("G:/IMDB_AllYears/rdc/IMDB_BDIM_2022_v1/data_donnees/stata/Core_IMDB/imdb_t1ff_", i, "_f3_v1.dta", sep=""), col_select=c("IMDB_ID", paste("AGE__I", i, sep=""), paste("EI___I", i, sep=""), paste("EI___F", i, sep=""), paste("TIRC_I", i, sep=""), paste("TIRC_F", i, sep=""), paste("XTIRCI", i, sep=""), paste("XTIRCF", i, sep=""),paste("IEMCOI",i,sep=""),paste("DEPDTI",i,sep=""),paste(indvar,i,sep="")))
  
  setnames(tax, old=c("IMDB_ID", paste("AGE__I", i, sep=""), paste("EI___I", i, sep=""), paste("EI___F", i, sep=""), paste("TIRC_I", i, sep=""), paste("TIRC_F", i, sep=""), paste("XTIRCI", i, sep=""), paste("XTIRCF", i, sep=""),paste("IEMCOI",i,sep=""),paste("DEPDTI",i,sep=""),paste(indvar,i,sep="")),
           new=c("IMDB_ID", "Age", "EmploymentIncome_IND", "EmploymentIncome_HH", "TIRC_IND", "TIRC_HH", "XTIRC_IND", "XTIRC_HH","IMM_STAT","EMIG_DATE","NAIC"))
  
  # Get Child income
  tmp <- merge(x=tmp, y=tax[,c("IMDB_ID", "EmploymentIncome_IND", "EmploymentIncome_HH", "TIRC_IND", "TIRC_HH", "XTIRC_IND", "XTIRC_HH","IMM_STAT","EMIG_DATE","NAIC")], by.x="IMDB_ID", by.y="IMDB_ID", all.x=T)
  setnames(tmp, old=c("EmploymentIncome_IND", "EmploymentIncome_HH","TIRC_IND", "TIRC_HH", "XTIRC_IND", "XTIRC_HH","IMM_STAT","EMIG_DATE","NAIC"), new=c("Child_EmpInc_IND", "Child_EmpInc_HH", "Child_TIRC_IND", "Child_TIRC_HH", "Child_XTIRC_IND", "Child_XTIRC_HH","CHILD_IMM_STAT","CHILD_EMIG_DATE","CHILD_NAIC"))
  
  # Get parent income (Parent 1, Parent 2, Main Parent)
  tmp <- merge(x=tmp, y=tax[,c("IMDB_ID", "EmploymentIncome_IND", "EmploymentIncome_HH", "TIRC_IND", "TIRC_HH", "XTIRC_IND", "XTIRC_HH","IMM_STAT","EMIG_DATE","NAIC")], by.x="PARENT1", by.y="IMDB_ID", all.x=T)
  setnames(tmp, old=c("EmploymentIncome_IND", "EmploymentIncome_HH", "TIRC_IND", "TIRC_HH", "XTIRC_IND", "XTIRC_HH","IMM_STAT","EMIG_DATE","NAIC"), new=c("Parent1_EmpInc_IND", "Parent1_EmpInc_HH", "Parent1_TIRC_IND", "Parent1_TIRC_HH", "Parent1_XTIRC_IND", "Parent1_XTIRC_HH","Parent1_IMM_STAT","Parent1_EMIG_DATE","Parent1_NAIC"))
  tmp <- merge(x=tmp, y=tax[,c("IMDB_ID", "EmploymentIncome_IND", "EmploymentIncome_HH", "TIRC_IND", "TIRC_HH", "XTIRC_IND", "XTIRC_HH","IMM_STAT","EMIG_DATE","NAIC")], by.x="PARENT2", by.y="IMDB_ID", all.x=T)
  setnames(tmp, old=c("EmploymentIncome_IND", "EmploymentIncome_HH", "TIRC_IND", "TIRC_HH", "XTIRC_IND", "XTIRC_HH","IMM_STAT","EMIG_DATE","NAIC"), new=c("Parent2_EmpInc_IND", "Parent2_EmpInc_HH", "Parent2_TIRC_IND", "Parent2_TIRC_HH", "Parent2_XTIRC_IND", "Parent2_XTIRC_HH","Parent2_IMM_STAT","Parent2_EMIG_DATE","Parent2_NAIC"))
  tmp <- merge(x=tmp, y=tax[,c("IMDB_ID", "EmploymentIncome_IND", "EmploymentIncome_HH", "TIRC_IND", "TIRC_HH", "XTIRC_IND", "XTIRC_HH","IMM_STAT","EMIG_DATE","NAIC")], by.x="MAIN_PARENT", by.y="IMDB_ID", all.x=T)
  setnames(tmp, old=c("EmploymentIncome_IND", "EmploymentIncome_HH", "TIRC_IND", "TIRC_HH", "XTIRC_IND", "XTIRC_HH","IMM_STAT","EMIG_DATE","NAIC"), new=c("MainParent_EmpInc_IND", "MainParent_EmpInc_HH", "MainParent_TIRC_IND", "MainParent_TIRC_HH", "MainParent_XTIRC_IND", "MainParent_XTIRC_HH","MainParent_IMM_STAT","MainParent_EMIG_DATE","MainParent_NAIC"))
  
  panel <- rbind(panel, tmp)
}

# Fwrite
fwrite(panel, paste(filepathimdb,"child_year_panel2025.csv",sep=""))

